# set up the environment
library(Seurat)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
library(dplyr)
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
library(ggplot2)
library(reshape2)
library(CelltypeR)
Warning: replacing previous import ‘data.table::last’ by ‘dplyr::last’ when loading ‘CelltypeR’
Warning: replacing previous import ‘data.table::first’ by ‘dplyr::first’ when loading ‘CelltypeR’
Warning: replacing previous import ‘data.table::between’ by ‘dplyr::between’ when loading ‘CelltypeR’
Warning: replacing previous import ‘dplyr::filter’ by ‘flowCore::filter’ when loading ‘CelltypeR’
Warning: replacing previous import ‘ggplot2::margin’ by ‘randomForest::margin’ when loading ‘CelltypeR’
Warning: replacing previous import ‘dplyr::combine’ by ‘randomForest::combine’ when loading ‘CelltypeR’
Warning: replacing previous import ‘flowCore::filter’ by ‘dplyr::filter’ when loading ‘CelltypeR’
Warning: replacing previous import ‘flowViz::contour’ by ‘graphics::contour’ when loading ‘flowStats’
Attaching package: ‘CelltypeR’
The following object is masked from ‘package:ggplot2’:
annotate
Figure 3A - predicted correlation matrix
hm <- heatmap(mat,
Rowv = NA, # Don't cluster rows
Colv = NA, # Don't cluster columns
col = colorRampPalette(c("white", "blue"))(100), # Define a color palette
main = "Reference Matrix")
hm
$rowInd
[1] 1 2 3 4 5 6 7 8 9 10 11 12 13
$colInd
[1] 1 2 3 4 5 6 7 8 9
$Rowv
NULL
$Colv
NULL
Plot with ggplot Figure 3A
hm
pdf("/Users/rhalenathomas/Documents/Projects_Papers/PhenoID/ForFigures/June2023/ReferenceMatrix.pdf", width = 6.5, height = 5)
hm
dev.off()
quartz_off_screen
2
Calculate correlations
Plot correlations Figure 3B, C and S3
p.cor2 <- plot_corr(cor2, threshold = 0.1, min_cells = 500)
Using X, best.cell.type, second.cell.type, cell.label, cell.label.ft, new_cell.label as id variables
Using X, best.cell.type, second.cell.type, cell.label, cell.label.ft, new_cell.label as id variables
Using X, best.cell.type, second.cell.type, cell.label, cell.label.ft, new_cell.label as id variables
pdf(paste(fig_outs,"CorPlots_thresh01.pdf",sep = ""))
p.cor2
[[1]]
[[2]]
[[3]]
[[4]]
Warning: Removed 1 rows containing missing values (`geom_violin()`).
[[5]]
[[6]]
[[7]]
[[8]]
dev.off()
null device
1
Adjust pair of cell types plots to plot mixes with higher cell counts Figures S4,S5,S6
pdf(paste(fig_outs,"CAMcorpairs_thresh0553.pdf",sep=""))
p1
dev.off()
null device
1
# Define the threshold number of pairs
df <- cor3 # threshold 0.35
threshold <- 150
# Filter the double-labeled cells
double.cells <- df[grep("-", df$cell.label),]
label_counts <- table(double.cells$cell.label)
filters_labels <- names(label_counts[label_counts >= threshold])
# Filter the double-labeled cells based on the filtered labels
filtered_double.cells <- double.cells[double.cells$cell.label %in% filters_labels, ]
# Melt the filtered double-labeled cells for plotting
df.melt.filtered <- melt(filtered_double.cells)
Using X, best.cell.type, second.cell.type, cell.label, cell.label.ft as id variables
# Plot the filtered double-labeled cells
p2 <- ggplot(df.melt.filtered, aes(x = variable, y = value, colour = variable, group = X))+
geom_line(show.legend = FALSE, size = 0.1, color = "black") +
geom_point() +
scale_color_manual(values = c("#4E84C4", "#52854C","purple","orange")) +
ylim(0.1, 0.8) +
facet_wrap(~ as.factor(cell.label)) +
ylab("Correlation Coefficient") +
xlab("")
p2
pdf(paste(fig_outs,"CAMcorpairs_thresh35.pdf",sep=""))
p2
dev.off()
quartz_off_screen
2
Make plots for Figure 3B and C
plot1b
plot2
Warning: Removed 3 rows containing missing values (`geom_violin()`).
Figure 3 clustering
This is with the subsample of 9000 cells per hMO Figure 3D and E
Add different correlation assignments and visualize on UMAP from the low threshold for CAM
Visualize CAM with the signifance 0.553 threshold
seu <- AddMetaData(object=seu, metadata= cor1$cell.label, col.name = 'cor.labels05')
# see the labels added
unique(seu$cor.labels05)
[1] "Unassigned" "astrocytes"
[3] "neurons" "RG"
[5] "OPC" "Epithelial"
[7] "NPC" "neurons-NPC"
[9] "oligodendrocyte" "Endothelial"
[11] "neurons-oligodendrocyte" "OPC-neurons"
[13] "NPC-stemlike" "oligodendrocyte-neurons"
[15] "neurons-OPC" "RG-astrocytes"
[17] "astrocytes-RG" "NPC-neurons"
[19] "oligodendrocyte-NPC" "stemlike"
[21] "NPC-oligodendrocyte" "stemlike-NPC"
[23] "Epithelial-NPC" "RG-NPC"
[25] "OPC-oligodendrocyte" "Endothelial-Epithelial"
[27] "Epithelial-Endothelial" "Epithelial-stemlike"
[29] "RG-Epithelial" "oligodendrocyte-OPC"
[31] "astrocytes-stemlike" "RG-oligodendrocyte"
[33] "neurons-stemlike" "Epithelial-RG"
DimPlot(seu, group.by = 'cor.labels05', label = TRUE) + theme(legend.position = "none")
Visualize CAM assignments with threshold R = 0.35
seu <- AddMetaData(object=seu, metadata= cor3$cell.label, col.name = 'cor.labels035')
# see the labels added
#unique(seu$cor.labels01)
# plot the cluster predictions
#plot_lab_clust(seu, seu$RNA_snn_res.0.7, seu$cor.labels01)
DimPlot(seu, group.by = 'cor.labels035', label = TRUE) + theme(legend.position = "none")
# removing the combined cell labels that are low frequency will improve visualization
# Check if the cell type has a frequency less than 500 and assign new label accordingly
# Check if the cell type has a frequency less than 500 and assign new label accordingly
cor3$cell.label.ft <- ifelse(cor3$cell.label %in% freq.cor3.df$cell.label[freq.cor3.df$Freq < 200], "few", cor3$cell.label)
seu <- AddMetaData(object=seu, metadata= cor3$cell.label.ft, col.name = 'cor.labels035ft')
unique(seu$cor.labels035ft)
[1] "astrocytes" "RG"
[3] "OPC" "neurons"
[5] "RG-Epithelial" "neurons-oligodendrocyte"
[7] "Endothelial" "Epithelial-Endothelial"
[9] "oligodendrocyte-OPC" "unassigned"
[11] "Epithelial" "NPC"
[13] "oligodendrocyte" "oligodendrocyte-neurons"
[15] "few" "Epithelial-RG"
[17] "neurons-OPC" "stemlike"
[19] "OPC-oligodendrocyte" "RG-astrocytes"
[21] "astrocytes-RG" "Endothelial-Epithelial"
[23] "OPC-neurons"
DimPlot(seu, group.by = 'cor.labels035ft', label = TRUE,
label.size = 8, repel = TRUE) + theme(legend.position = "none")
NA
NA
Get the top assigned cells per cluster for each threshold
cor.ann.035 <- get_annotation(seu, seu.cluster = seu$RNA_snn_res.0.7,
seu.label = seu$cor.labels035, top_n = 3,
filter_out = c("Unknown","unknown","Mixed",
"unassigned","Unassigned"),
Label = "CAM")
[1] "filtering"
cor.ann.01 <- get_annotation(seu, seu.cluster = seu$RNA_snn_res.0.7,
seu.label = seu$cor.labels01, top_n = 3,
filter_out = c("Unknown","unknown","Mixed","
unassigned","Unassigned"),
Label = "CAM")
[1] "filtering"
cor.ann.05 <- get_annotation(seu, seu.cluster = seu$RNA_snn_res.0.7,
seu.label = seu$cor.labels05, top_n = 3,
filter_out = c("Unknown","unknown","Mixed",
"unassigned","Unassigned"),
Label = "CAM")
[1] "filtering"
Visualize Marker expression
length(unique(seu$RNA_snn_res.0.7))
[1] 19
# 19
# if we want to plot by cluster we need a vector of from 0 to the n-1 clusters
cluster.num <- c(0:18)
plotmean(plot_type = 'heatmap',seu = seu, group = 'RNA_snn_res.0.7', markers = AB,
var_names = cluster.num, slot = 'scale.data', xlab = "Cluster",
ylab = "Antibody")
NA
NA
Feature plots
Idents(seu) <- "RNA_snn_res.0.7"
for (i in AB) {
print(FeaturePlot(seu, features = i, min.cutoff = 'q1', max.cutoff = 'q97', label = TRUE))
}
NA
NA
pdf(paste(fig_outs,"HM9000_fig3E.pdf"),width = 8.5, height = 5)
DoHeatmap(seu, features = AB, size= 6,slot = "scale.data", group.colors = clust.colours, disp.max = 2, disp.min = -1.5,
angle = 90) + scale_fill_gradientn(colors = c("#154c79", "#eeeee4", "#e28743")) +
theme(axis.text.y = element_text(size = 15))
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
dev.off()
null device
1
seu <-readRDS("/Users/rhalenathomas/Documents/Data/FlowCytometry/PhenoID/Analysis/NatMethodJuneSubmission/Seu9000lablesJune23.RDS")
Warning message:
R graphics engine version 15 is not supported by this version of RStudio. The Plots tab will be disabled until a newer version of RStudio is installed.
Train a Random Forest classifier
markers <- rev(c("CD24","CD56","CD29","CD15","CD184","CD133","CD71","CD44","GLAST","AQP4","HepaCAM", "CD140a","O4"))
rf <- RFM_train(seurate_object = seu,
AB_list = markers, annotations = seu$labels,
split = c(0.5,0.5),
downsample = "none",
seed = 222,
mytry = c(1:10),
maxnodes = c(10: 20),
trees = c(250, 500, 1000,2000),
start_node = 15)